Time to Restore - 2025 data

Author

Erin Zylstra

Published

April 14, 2025

Identifying priority species

Used list of priority species based on: TimeToRestore_AllPrioritySpecies_2024 google sheet (extracted on 2 April 2025) and made the following changes:

  • Corrected the spelling of one genus (Vernonia) and one species (cespitosa)

  • Updated scientific and common names based on ITIS for mistflower (Conoclinium), basket-flower (Centaurea), and Lantana; will use the common name listed in ITIS/NPN data base for these species imoving forward.

  • Using the common name listed in NPN database for Sambucus nigra and Passiflora incarnata, which were each listed twice in the googlesheet under two different common names.

Code
# Load list of priority species
spp_list <- read.csv("data/ttr-priorityspecies-20250402.csv", 
                     na.strings = c(NA, ""))
spp_list <- spp_list %>%
  mutate(across(c(common_name, scientific_name), str_trim)) %>%
  # Edit spelling of one genus (Vernonia) and one species (cespitosa)
  # Update species based on ITIS for mistflower (Conoclinium), 
  # basket-flower (Centaurea), and Lantana
  mutate(scientific_name = case_when(
    scientific_name == "Oenothera caespitosa" ~ "Oenothera cespitosa",
    scientific_name == "Veronia gigantea" ~ "Vernonia gigantea",
    scientific_name == "Conoclinium greggii" ~ "Conoclinium dissectum",
    scientific_name == "Centaurea americana" ~ "Plectocephalus americanus",
    scientific_name == "Lantana urticoides" ~ "Lantana horrida", 
    .default = scientific_name
  )) %>%
  rename(ttr_common_name = common_name)

# Load information about NN species
nn_spp <- npn_species() %>% data.frame()
nn_spp <- nn_spp %>%
  filter(kingdom == "Plantae") %>%
  select(species_id, common_name, genus, species, functional_type) %>%
  mutate(scientific_name = paste(genus, species))

# Find NN info based on scientific name of priority species (inconsistent 
# capitalization in priority list and a duplicate in NN database [Canada goldenrod])
spp_list <- spp_list %>%
  left_join(nn_spp, by = "scientific_name")
# Check that all priority species have a match in NN database
  # filter(spp_list, is.na(species_id))
# Are there any duplicates? Yes
  # count(spp_list, species_id) %>% filter(n > 1)
  # filter(spp_list, species_id == 90) 
  # In priority list, Sambucus nigra listed as both black and common elderberry
  # filter(spp_list, species_id == 182) 
  # In priority list, Passiflora incarnata listed as both purple passionflower and Maypop

# Remove entries for TTR priority species whose common name doesn't match NPN
spp_list <- spp_list %>%
  filter(!ttr_common_name %in% c("Maypop", "Common elderberry"))

This left us with 53 priority species.

Downloading status-intensity data

Used rnpn package to download status and intensity data for priority species in Louisiana, New Mexico, Oklahoma, and Texas from 1 October 2023 through the present day. Downloaded observations of 4 phenophases: flowers or flower buds (flower), open flowers, fruits, and ripe fruits. Since leaves on milkweed plants may be important for monarch eggs and catepillars, we also downloaded observations of the leaves phenophase for the 8 species of milkweed on the priority list. After downloading the data, we appended information about intensity categories.

Code
# First, check that all species have these 4 phenophases
phenophases_byspp <- npn_phenophases_by_species(
  species_ids = c(spp_list$species_id),
  date = "2025-01-01"
) %>% data.frame()
phenophases_byspp %>%
  group_by(species_id, species_name) %>%
  summarize(p500 = ifelse(500 %in% phenophase_id, 1, 0),
            p501 = ifelse(501 %in% phenophase_id, 1, 0),
            p516 = ifelse(516 %in% phenophase_id, 1, 0),
            p390 = ifelse(390 %in% phenophase_id, 1, 0),
            .groups = "keep") %>%
  rowwise() %>%
  filter(sum(c_across(p500:p390)) < 4)
# All species use these 4 phenophases

# What phenophases do the milkweeds use?
milkweed_phps <- phenophases_byspp %>%
  filter(str_detect(species_name, "milkweed")) %>%
  select(species_id, species_name, pheno_class_id, phenophase_id, phenophase_name) %>%
  arrange(species_name, pheno_class_id, phenophase_id)
# Since leaves may be important for monarch eggs and catepillars, we may also 
# want to include:
# 488 = Leaves (for milkweeds only)

# Download and format (or load existing) NPN data for priority plant species --#
  
# Observations in 4 states (LA, NM, OK, TX)
# Focus on 2025 data, but also download 2024 data (for fruits and/or comparison)

phenophases <- c(500, 501, 516, 390)
states4 <- c("LA", "NM", "OK", "TX")
  # Minor note: we could be missing observations in the four states if we use
  # the states argument in the download function because sometimes the state 
  # field is missing or incorrect. Won't worry about that here, but would be 
  # better in the long run to check state values based on lat/lons

data_filename <- "data/ttr-data-20242025.csv"

if (!file.exists(data_filename) | update_data == TRUE) {
  
  # Download flowering, fruiting data for 2024-2025 (including 2023 so that 
  # we can calculated individual phenometrics for 2024, if needed)
  status_dl <- npn_download_status_data(
    request_source = "erinz",
    years = 2023:2025,
    species_ids = spp_list$species_id,
    phenophase_ids= phenophases,
    states = states4,
    additional_fields = c("observedby_person_id",
                          "partner_group",
                          "site_name", 
                          "species_functional_type"))
  status_dl <- data.frame(status_dl)
  
  # Download leafing data for milkweeds in 2024-2025
  milkweeds <- spp_list %>%
    filter(str_detect(common_name, "milkweed")) %>%
    pull(species_id)
  status_mwleaf_dl <- npn_download_status_data(
    request_source = "erinz",
    years = 2023:2025,
    species_ids = milkweeds,
    phenophase_ids= 488,
    states = states4,
    additional_fields = c("observedby_person_id",
                          "partner_group",
                          "site_name", 
                          "species_functional_type"))
  status_mwleaf_dl <- data.frame(status_mwleaf_dl)
  
  # Combine everything and format
  status_df <- rbind(status_dl, status_mwleaf_dl) %>%
    mutate(obsdate = ymd(observation_date),
           yr = year(obsdate),
           php = case_when(
             phenophase_id == 500 ~ "flower",
             phenophase_id == 501 ~ "open flower",
             phenophase_id == 516 ~ "fruit",
             phenophase_id == 390 ~ "ripe fruit",
             phenophase_id == 488 ~ "leaves")) %>%
    filter(obsdate >= "2023-10-01") %>%
    select(-c(update_datetime, elevation_in_meters, genus, species, kingdom,
              phenophase_description, abundance_value, observation_date)) %>%
    rename(person_id = observedby_person_id,
           func_type = species_functional_type,
           lat = latitude,
           lon = longitude)
  
  # Write to file
  write.csv(status_df, data_filename, row.names = FALSE)
  # Remove objects
  rm(status_df, status_dl, status_mwleaf_dl)
}

status <- read.csv(data_filename) %>%
  mutate(obsdate = ymd(obsdate),
         wy = ifelse(obsdate >= "2024-10-01", 2025, 2024))

For the purposes of this report, we focused on observations submitted in the current water year, from 1 October 2024 - 29 March 2025, the last date that observations were available. Throughout this report, we often compared current water-year data with data collected during the previous water year (October 2023 - September 2024), while keeping in mind that we are only partially through the current water year.

Code
# Download information about intensity categories
ic <- npn_abundance_categories() %>% data.frame()
ic <- ic %>%
  rename(intensity_category_id = category_id, 
         intensity_value_id = value_id,
         intensity_name = category_name,
         intensity_value = value_name) %>%
  select(-c(category_description, value_description))

# Extract just those categories that appear in status data and format:
ic_subset <- ic %>%
  filter(intensity_category_id %in% unique(status$intensity_category_id)) %>%
  mutate(value1 = NA,
         value2 = NA,
         intensity_type = case_when(
           str_detect(intensity_value, "%") ~ "percent",
           str_detect(intensity_value, "[0-9]") ~ "number",
           .default = "qualitative"
         ))
val12 <- which(colnames(ic_subset) %in% c("value1", "value2"))
for (i in 1:nrow(ic_subset)) {
  if (str_detect(ic_subset$intensity_value[i], " to ")) {
    ic_subset[i, val12] <- str_split_fixed(ic_subset$intensity_value[i], " to ", 2)
    ic_subset[i, val12] <- as.numeric(str_remove(ic_subset[i, val12], ","))
  } else if (str_detect(ic_subset$intensity_value[i], "-")) {
    ic_subset[i, val12] <- str_split_fixed(ic_subset$intensity_value[i], "-", 2)
    ic_subset[i, val12[2]] <- str_remove(ic_subset[i, val12[2]], "%")
  } else if (str_detect(ic_subset$intensity_value[i], "% or more")) {
    ic_subset[i, val12] <- str_remove(ic_subset$intensity_value[i], "% or more")
  } else if (str_detect(ic_subset$intensity_value[i], "Less than ")) {
    ic_subset[i, val12[1]] <- 0
    ic_subset[i, val12[2]] <- str_remove(ic_subset$intensity_value[i], "Less than ")
    ic_subset[i, val12[2]] <- str_remove(ic_subset[i, val12[2]], "%")
  } else if (str_detect(ic_subset$intensity_value[i], "More than ")) {
    ic_subset[i, val12] <- str_remove(ic_subset$intensity_value[i], "More than ")
    ic_subset[i, val12[1]] <- as.numeric(str_remove(ic_subset[i, val12[1]], ",")) + 1
    ic_subset[i, val12[2]] <- as.numeric(str_remove(ic_subset[i, val12[2]], ",")) + 1
  }
}
ic_subset <- ic_subset %>%
  mutate_at(c("value1", "value2"), as.numeric)

# Assigning a middle-ish value for each range (keeping it to nice numbers like 
# 5, 50, 500, and 5000)
ic_subset <- ic_subset %>%
  mutate(mag = nchar(value1) - 1) %>%
  mutate(value = case_when(
    value1 == value2 ~ round(value1),
    intensity_type == "number" & value1 == 0 ~ 1,
    intensity_type == "number" & value1 != 0 ~ 
      round_any(rowMeans(across(value1:value2)), 5 * (10 ^ mag)),
    intensity_type == "percent" ~ round(rowMeans(across(value1:value2))),
    .default = NA
  )) %>%
  select(-c(mag, value1, value2))

ic_append <- ic_subset %>%
  select(intensity_category_id, intensity_name, intensity_value, value) %>%
  rename(intensity_cat = intensity_value, 
         intensity = value)

status <- status %>%
  left_join(ic_append, 
            by = c("intensity_category_id", 
                   "intensity_value" = "intensity_cat")) %>%
  select(-intensity_category_id)

Identifying inconsistent phenophase status reports

We wanted to identify when observers provided incompatible status reports for different phenophases. In particular, we identified when observers reported a “no” to flowers but reported a “yes” or “?” to open flowers. Similarly, we identified when observers reported a “no” to fruits but reported a “yes” or “?” to ripe fruits. We also identified when observers reported a “?” to flowers but reported a “yes” to open flowers, and when observers reported a “?” to fruits but reported a “yes” to ripe fruits.

Code
# To look at this, can't have more than one observation of a plant per person
# per day. We've already removed duplicates, but now need to resolve instances 
# where somebody made multiple observations of the same plant on the same date 
# that differed in some way.

  # For now, will keep record with more advanced phenophase or higher 
  # intensity value. Will do this by sorting observations in descending
  # order and keeping only the first
  inddateobsp <- status %>%
    group_by(common_name, individual_id, obsdate, person_id, php) %>%
    summarize(n_obs = n(),
              .groups = "keep") %>%
    data.frame()
  inddateobsp$obsnum <- 1:nrow(inddateobsp)
  
  status <- status %>%
    arrange(person_id, individual_id, obsdate, php, 
            desc(phenophase_status), desc(intensity)) %>%
    left_join(select(inddateobsp, -c(n_obs, common_name)), 
              by = c("person_id", "individual_id", 
                     "obsdate", "php")) %>%
    # Create "dups" column, where dups > 1 indicates that the observation can be
    # removed since there's another observation that same day with more advanced
    # phenology or higher intensity/abundance.
    mutate(dups = sequence(rle(as.character(obsnum))$lengths))
  
  # Remove extra observations and unnecessary columns
  status <- status %>%
    filter(dups == 1) %>%
    select(-c(obsnum, dups)) %>%
    arrange(common_name, obsdate, person_id, php)

# To identify inconsistent status values, will need to put flower/fruit data 
# into wide form (all data for a plant visit in the same row). Removing
# unknown status observations first (<0.5% of fruit/flower observations).
statusw <- status %>%
  filter(php != "leaves") %>%
  filter(phenophase_status != -1) %>%
  select(person_id, partner_group, site_id, state, common_name, individual_id,
         wy, obsdate, php, phenophase_status, intensity) %>%
  rename(status = phenophase_status) %>%
  pivot_wider(names_from = php,
              names_glue = "{php}_{.value}",
              values_from = c(status, intensity)) %>%
  data.frame()

# Identify phenophase status inconsistencies
# NOTE: changing NAs to 999 in order to make this code simpler
statusw <- statusw %>%
  mutate(across(contains("status"), ~replace_na(., 999))) %>%
  # Problem: flower = 0, open = NA or 1
  mutate(flower0_openNot0 = ifelse(flower_status == 0 & open.flower_status != 0, 
                                   1, 0)) %>%
  # Problem: flower = NA, open = 1
  mutate(flowerNA_open1 = ifelse(flower_status == 999 & open.flower_status == 1,
                                 1, 0)) %>%
  # Problem: fruit = 0, ripe = NA or 1
  mutate(fruit0_ripeNot0 = ifelse(fruit_status == 0 & ripe.fruit_status != 0, 
                                   1, 0)) %>%
  # Problem: fruit = NA, ripe = 1
  mutate(fruitNA_ripe1 = ifelse(fruit_status == 999 & ripe.fruit_status == 1,
                                1, 0))

# Table summarizing problems with flower/open flower status
flower_probs <- statusw %>%
  group_by(common_name, wy) %>%
  summarize(n = n(),
            n_flower0 = sum(flower_status == 0),
            n_flower1 = sum(flower_status == 1),
            n_flowerNA = sum(flower_status == 999),
            n_open0 = sum(open.flower_status == 0),
            n_open1 = sum(open.flower_status == 1),
            n_openNA = sum(open.flower_status == 999),
            n_flower0_openNot0 = sum(flower0_openNot0 == 1),
            n_flowerNA_open1 = sum(flowerNA_open1 == 1),
            .groups = "keep") %>%
  filter(n_flower0_openNot0 + n_flowerNA_open1 > 0) %>%
  data.frame()

# Table summarizing problems with fruit/ripe fruit status
fruit_probs <- statusw %>%
  group_by(common_name, wy) %>%
  summarize(n = n(),
            n_fruit0 = sum(fruit_status == 0),
            n_fruit1 = sum(fruit_status == 1),
            n_fruitNA = sum(fruit_status == 999),
            n_ripe0 = sum(ripe.fruit_status == 0),
            n_ripe1 = sum(ripe.fruit_status == 1),
            n_ripeNA = sum(ripe.fruit_status == 999),
            n_fruit0_ripeNot0 = sum(fruit0_ripeNot0 == 1),
            n_fruitNA_ripe1 = sum(fruitNA_ripe1 == 1),
            .groups = "keep") %>%
  filter(n_fruit0_ripeNot0 + n_fruitNA_ripe1 > 0) %>%
  data.frame()
Table 3: Inconsistent reports of flowering phenophase statuses by species and water year
Species Water year No. observations Flowers:no AND Open flowers:yes/? Flowers:? AND Open flowers:yes
American beautyberry 2024 585 0 28
American beautyberry 2025 386 2 0
common buttonbush 2024 300 2 0
common buttonbush 2025 163 1 0
common sunflower 2024 156 1 0
eastern baccharis 2025 64 1 0
eastern purple coneflower 2024 34 1 0
eastern redbud 2024 462 4 2
horsetail milkweed 2024 147 1 0
purple prairie clover 2024 250 0 17
red maple 2024 497 1 0
red maple 2025 291 2 0
rubber rabbitbrush 2024 178 3 0
wax mallow 2025 27 1 0
Table 4: Inconsistent reports of fruiting phenophase statuses
Species Water year No. observations Fruit:no AND Ripe fruit:yes/? Fruit:? AND Ripe fruit:yes
American star-thistle 2025 7 1 0
blackeyed Susan 2024 1 1 0
blackeyed Susan 2025 7 1 0
common buttonbush 2024 300 1 0
common sunflower 2024 156 4 0
common sunflower 2025 53 1 0
eastern baccharis 2024 114 1 0
eastern redbud 2024 462 1 0
eastern redbud 2025 206 2 0
horsetail milkweed 2024 147 1 0
horsetail milkweed 2025 40 1 0
purple passionflower 2025 12 1 0
rubber rabbitbrush 2025 59 1 0
white crownbeard 2025 75 1 0

Summary of sites monitored

Code
# Plot locations where flowering/fruiting or milkweed leaves observed in current
# water year (Oct 2024 and present)
status <- status %>%
  mutate(ind_date = paste0(individual_id, "_", obsdate)) 

locs <- status %>%
  filter(wy == 2025) %>%
  group_by(site_id, lat, lon, state) %>%
  summarize(nspp = n_distinct(common_name),
            nplants = n_distinct(individual_id),
            nobservers = n_distinct(person_id),
            # nobs: Number of observations, where an observations is all
            # data (all phenophases) submitted for a plant on given date
            nobs = n_distinct(ind_date), 
            .groups = "keep") %>%
  data.frame()

locs_by_state_int <- locs %>%
  group_by(state) %>%
  summarize(nspp_per_site = round(mean(nspp), 2),
            nplants_per_site = round(mean(nplants), 2),
            nobs_per_site = round(mean(nobs),2)) %>%
  data.frame()

locs_by_state <- status %>%
  filter(wy == 2025) %>%
  group_by(state) %>%
  summarize(nsites = n_distinct(site_id),
            nspp = n_distinct(common_name),
            nplants = n_distinct(individual_id),
            nobs = n_distinct(ind_date)) %>%
  data.frame() %>%
  left_join(locs_by_state_int, by = "state")
Figure 1: Locations of sites where priority species were monitored between October 2024 and present.
Figure 2: Locations of sites in Louisiana where priority species were monitored between October 2024 and present.
Figure 3: Locations of sites in New Mexico where priority species were monitored between October 2024 and present.
Figure 4: Locations of sites in Oklahoma where priority species were monitored between October 2024 and present.
Figure 5: Locations of sites in Texas where priority species were monitored between October 2024 and present.
Table 5: Summary of sites monitored in each state from October 2024 to present. Here, an observation is all data collected for a plant on a given date.
State No. sites No. species No. plants No. observations Mean no. species per site Mean no. plants per site Mean no. of observations per site
LA 12 10 55 621 2.50 4.58 51.75
NM 5 6 14 183 2.00 2.80 36.60
OK 2 2 6 116 1.00 3.00 58.00
TX 63 29 159 858 2.24 2.52 13.62

Summary of species monitored

Table 6: Summary of plants monitored from October 2024 to present. Here, an observation is all data collected for a plant on a given date.
Species Functional group No. plants: LA No. plants: NM No. plants: OK No. plants: TX Total no. plants No. sites Mean no. observations
American beautyberry Deciduous broadleaf 8 0 0 33 41 27 9.1
red maple Deciduous broadleaf 22 0 0 2 24 9 11.9
eastern redbud Deciduous broadleaf 2 1 1 18 22 20 9.0
common buttonbush Deciduous broadleaf 10 0 0 7 17 13 8.0
wax mallow Deciduous broadleaf 0 0 0 9 9 8 3.0
eastern baccharis Deciduous broadleaf 6 0 0 0 6 4 10.7
West Indian shrubverbena Deciduous broadleaf 0 0 0 4 4 4 1.8
black elderberry Deciduous broadleaf 1 0 0 0 1 1 1.0
trumpet honeysuckle Deciduous broadleaf 0 0 0 1 1 1 34.0
rubber rabbitbrush Drought deciduous broadleaf 0 4 0 0 4 2 14.8
Texas lupine Forb 0 0 0 14 14 13 4.4
white crownbeard Forb 2 0 0 8 10 9 7.5
blue mistflower Forb 0 0 0 8 8 8 2.1
mealycup sage Forb 0 0 0 7 7 6 2.4
wild bergamot Forb 1 0 0 5 6 6 4.0
common sunflower Forb 0 2 0 3 5 5 10.6
firewheel Forb 0 0 0 5 5 5 5.2
purple prairie clover Forb 0 0 5 0 5 1 23.0
purple passionflower Forb 1 0 0 3 4 4 3.0
American star-thistle Forb 0 0 0 3 3 3 2.0
blackeyed Susan Forb 0 0 0 3 3 3 2.3
green antelopehorn Forb 0 0 0 3 3 3 5.0
horsetail milkweed Forb 0 3 0 0 3 2 13.3
spider milkweed Forb 0 0 0 3 3 2 2.7
Canada goldenrod Forb 0 0 0 2 2 2 1.5
aquatic milkweed Forb 0 0 0 2 2 2 6.5
broadleaf milkweed Forb 0 2 0 0 2 1 10.0
button eryngo Forb 2 0 0 0 2 1 8.0
eastern purple coneflower Forb 0 0 0 2 2 2 2.0
lemon beebalm Forb 0 0 0 2 2 2 1.0
palmleaf thoroughwort Forb 0 0 0 2 2 2 3.0
showy milkweed Forb 0 2 0 0 2 2 15.0
golden crownbeard Forb 0 0 0 1 1 1 1.0
upright prairie coneflower Forb 0 0 0 1 1 1 1.0
turkey tangle fogfruit Semi-evergreen forb 0 0 0 6 6 6 2.2
seaside goldenrod Semi-evergreen forb 0 0 0 1 1 1 6.0
straggler daisy Semi-evergreen forb 0 0 0 1 1 1 1.0

Summary of observers

Table 7: Summary of people who submitted data for priority species since October 2024, by state. Values in table are means, with range of values in parentheses.
State No. observers No. species per observer No. plants per observer No. plants per species
LA 16 3.44 (1 - 6) 8.12 (1 - 21) 2.43 (1 - 7)
NM 18 1.89 (1 - 5) 2.44 (1 - 8) 1.14 (1 - 1.6)
OK 7 1 (1 - 1) 4.43 (1 - 5) 4.43 (1 - 5)
TX 59 2.37 (1 - 16) 3.22 (1 - 20) 1.62 (1 - 5)


Figure 6: Number of species each observer monitored since October 2024.


Figure 7: Number of plants per species that observers monitored since October 2024

Observation frequency: Reporting a “no” prior to the first “yes”

To evaluate observation frequency, we downloaded individual phenometrics data for priority species from 1 October 2023 through the present day.

Code
# Download individual phenometrics data
ip_filename <- "data/ttr-ipdata-20242025.csv"

if (!file.exists(ip_filename) | update_data == TRUE) {
  
  # Download flowering, fruiting data for Oct 2024 - April 2025 and Oct 2023 - 
  # Sep 2024 (phenometrics aggregated over water year).
  # After some experimenting, it looks like when requesting data by water year, 
  # the years argument corresponds to the start of each year in October. So we
  # need to use years = 2023:2024 for this call.
  ip_dl <- npn_download_individual_phenometrics(
    request_source = "erinz",
    years = 2023:2024,
    period_start = "10-01",
    period_end = "09-30",
    species_ids = spp_list$species_id,
    phenophase_ids= phenophases,
    states = states4,
    additional_fields = c("observedby_person_id",
                          "partner_group",
                          "site_name", 
                          "species_functional_type"))
  ip_dl <- data.frame(ip_dl)
  
  # Download leafing data for milkweeds in 2024-2025
  milkweeds <- spp_list %>%
    filter(str_detect(common_name, "milkweed")) %>%
    pull(species_id)
  ip_mwleaf_dl <- npn_download_individual_phenometrics(
    request_source = "erinz",
    years = 2023:2024,
    period_start = "10-01",
    period_end = "09-30",
    species_ids = milkweeds,
    phenophase_ids= 488,
    states = states4,
    additional_fields = c("observedby_person_id",
                          "partner_group",
                          "site_name", 
                          "species_functional_type"))
  ip_mwleaf_dl <- data.frame(ip_mwleaf_dl)
  
  # Combine everything and format
  ip_df <- rbind(ip_dl, ip_mwleaf_dl) %>%
    mutate(php = case_when(
      phenophase_id == 500 ~ "flower",
      phenophase_id == 501 ~ "open flower",
      phenophase_id == 516 ~ "fruit",
      phenophase_id == 390 ~ "ripe fruit",
      phenophase_id == 488 ~ "leaves")) %>%
    select(-c(observedby_person_id, elevation_in_meters, genus, species, kingdom,
              phenophase_description, first_yes_month, first_yes_day,
              first_yes_julian_date, last_yes_year, last_yes_month, 
              last_yes_day, last_yes_julian_date, numdays_until_next_no)) %>%
    rename(func_type = species_functional_type,
           lat = latitude,
           lon = longitude,
           prior_no = numdays_since_prior_no)
  
  # Write to file
  write.csv(ip_df, ip_filename, row.names = FALSE)
  # Remove objects
  rm(ip_df, ip_dl, ip_mwleaf_dl)
  
}

ip <- read.csv(ip_filename)

For these summaries, we evaluated observation frequency by classifying flowering and fruiting observations based on the time elapsed between a “no” observation and the first “yes” observation of a given phenophase. Here, we compared first yeses that were submitted between 1 November 2024 and the present (~2025 water year) with first yeses that were submitted between 1 November 2023 and 30 September 2024 (~2024 water year).

Code
ip <- ip %>%
  # Create "season" (Nov 2024 - April 2025 = 2025)
  mutate(yes_date = parse_date_time(paste(first_yes_year, first_yes_doy),
                                    orders = "Y j"),
         yes_month = month(yes_date),
         season = case_when(
           yes_month %in% 11:12 ~ first_yes_year + 1,
           yes_month %in% 1:9 ~ first_yes_year,
           .default = NA))
# check
# count(ip, first_yes_year, yes_month, season)

# Proportion of first yeses that are preceded by a prior no within X days, 
# by phenophase and season
obsfreq_php <- ip %>%
  filter(!is.na(season)) %>%
  filter(php != "leaves") %>%
  mutate(php = factor(php, levels = c("flower", "open flower", "fruit", 
                                      "ripe fruit"))) %>%
  group_by(season, php) %>%
  summarize(nobs = n(),
            nobs3 = sum(!is.na(prior_no) & prior_no <= 3),
            nobs7 = sum(!is.na(prior_no) & prior_no <= 7),
            nobs14 = sum(!is.na(prior_no) & prior_no <= 14),
            nobs30 = sum(!is.na(prior_no) & prior_no <= 30),
            .groups = "keep") %>%
  mutate(prop3 = round(nobs3/nobs, 2),
         prop7 = round(nobs7/nobs, 2),
         prop14 = round(nobs14/nobs, 2),
         prop30 = round(nobs30/nobs, 2)) %>%
  data.frame()

# Make a bar chart for this (minus milkweed leaf observations because few of them)
obsfreq_phpl <- ip %>%
  filter(!is.na(season)) %>%
  filter(php != "leaves") %>%
  mutate(php = factor(php, levels = c("flower", "open flower", 
                                      "fruit", "ripe fruit"))) %>%
  group_by(season, php) %>%
  summarize(nobs03 = sum(!is.na(prior_no) & prior_no %in% 1:3),
            nobs07 = sum(!is.na(prior_no) & prior_no %in% 4:7),
            nobs14 = sum(!is.na(prior_no) & prior_no %in% 8:14),
            nobs30 = sum(!is.na(prior_no) & prior_no %in% 15:30),
            nobs99 = sum((!is.na(prior_no) & prior_no > 30) | is.na(prior_no)),
            .groups = "keep") %>%
  pivot_longer(cols = nobs03:nobs99,
               names_to = "cat",
               values_to = "nobs") %>%
  data.frame()

freq_by_php <- ggplot(obsfreq_phpl) +
  geom_bar(aes(x = php, y = nobs, fill = cat),
           position = "stack",
           stat = "identity") +
  scale_fill_manual(values = c("#7fc97f", "#beaed4", "#fdc086",
                               "#ffff99", "#386cb0"),
                    labels = c("1-3 days", "4-7 days", "8-14 days",
                               "15-30 days", ">30 days")) +
  facet_grid( ~ season) +
  labs(x = "Phenophase", y = 'Number of first "yes" observations',
       fill = 'Prior "no"') +
  theme_bw()

# Proportion of first yeses that are preceded by a prior no within X days, 
# by species and season
obsfreq_spp <- ip %>%
  filter(!is.na(season)) %>%
  filter(php != "leaves") %>%
  group_by(common_name, season) %>%
  summarize(nobs = n(),
            nobs3 = sum(!is.na(prior_no) & prior_no <= 3),
            nobs7 = sum(!is.na(prior_no) & prior_no <= 7),
            nobs14 = sum(!is.na(prior_no) & prior_no <= 14),
            nobs30 = sum(!is.na(prior_no) & prior_no <= 30),
            .groups = "keep") %>%
  mutate(prop3 = round(nobs3/nobs, 2),
         prop7 = round(nobs7/nobs, 2),
         prop14 = round(nobs14/nobs, 2),
         prop30 = round(nobs30/nobs, 2)) %>%
  data.frame()

# Make a bar chart for this
# Only using species that had at least 10 first yeses in one of the years:
spp10 <- unique(obsfreq_spp$common_name[obsfreq_spp$nobs >= 10])
obsfreq_sppl <- ip %>%
  filter(!is.na(season)) %>%
  filter(php != "leaves") %>%
  filter(common_name %in% spp10) %>%
  mutate(common_name = factor(common_name)) %>%
  group_by(season, common_name) %>%
  summarize(nobs03 = sum(!is.na(prior_no) & prior_no %in% 1:3),
            nobs07 = sum(!is.na(prior_no) & prior_no > 3 & prior_no < 8),
            nobs14 = sum(!is.na(prior_no) & prior_no %in% 8:14),
            nobs30 = sum(!is.na(prior_no) & prior_no %in% 15:30),
            nobs99 = sum((!is.na(prior_no) & prior_no > 30) | is.na(prior_no)),
            .groups = "keep") %>%
  pivot_longer(cols = nobs03:nobs99,
               names_to = "cat",
               values_to = "nobs") %>%
  data.frame()

freq_by_spp <- ggplot(obsfreq_sppl) +
  geom_bar(aes(x = common_name, y = nobs, fill = cat),
           position = "stack",
           stat = "identity") +
  scale_fill_manual(values = c("#7fc97f", "#beaed4", "#fdc086",
                               "#ffff99", "#386cb0"),
                    labels = c("1-3 days", "4-7 days", "8-14 days",
                               "15-30 days", ">30 days")) +
  scale_x_discrete(labels = function(x) str_wrap(x, width = 15)) +
  facet_grid(rows = vars(season)) +
  labs(x = "Species", y = 'Number of first "yes" observations',
       fill = 'Prior "no"') +
  theme_bw() +
  theme(legend.position = "bottom")


Figure 8: Proportion of first “yes” observations that were associated with a prior “no” with a given number of days, grouped by phenophase. 2024 = November 2023 - September 2024. 2025 = November 2024 - present.


Figure 9: Proportion of first “yes” observations that were associated with a prior “no” with a given number of days, grouped by species. We excluded species with fewer than 10 “yes” observations in both water years. 2024 = November 2023 - September 2024. 2025 = November 2024 - present.

Plant information

We also extracted information that observers provided about individual plants and summarized this information by species.

Code
ids <- unique(status$individual_id)

idsl <- 1:length(ids) - 1
ids_call <- paste0("individual_id[", idsl, "]=", ids, "&", collapse = "")
ids_call <- str_sub(ids_call, end = -2)

url_base <- "https://services.usanpn.org/npn_portal/individuals/getPlantDetails.json?"
url <- paste0(url_base, ids_call)

# Get information about each plant from API
json_data <- read_json(
  path = url, 
  simplifyVector = TRUE
)
df <- json_data %>%
  select(-c(comment, plant_image_url, plant_image_upload_date))

# Append information about species to the plant data
status <- status %>%
  left_join(select(nn_spp, species_id, common_name, scientific_name), 
            by = c("common_name", "species_id"))
df <- df %>%
  left_join(distinct(status, individual_id, common_name), by = "individual_id")

# Check that all species in status-intensity data appear in plant info df
# status$individual_id[!status$individual_id %in% df$individual_id]
# df$individual_id[!df$individual_id %in% status$individual_id]
# ok

# Remove any duplicates (for some reason there are some [~20])
df <- distinct(df)
# Check that there's only one row per individual
# length(unique(df$individual_id)); length(ids)

# Look at variable structures
# count(df, patch) # 52 yes, rest are blank
# count(df, patch_size) # 45 with value, rest are blank
# count(df, shade_status) # 110 blank
# count(df, wild) # 0 or 1 or blank (110)
# count(df, watered) # 0 or 1 or blank (112)
# count(df, fertilized) # 0 or 1 or blank (123)
# count(df, gender) # Female (1) or Both (36) or blank (247)
# count(df, planting_date) # 225 blank or "--", rest with mix of year or exact date
# count(df, death_date_observed) # Only 1 value
# count(df, last_date_observed_alive) # Only 1 value
# count(df, death_reason) # 2 Unknown, rest blank

# Summarize by species
spp <- df %>%
  group_by(common_name) %>%
  summarize(n_plants = n(),
            patch_yes = sum(patch == 1),
            patch_with_size = sum(patch_size != ""),
            shade_reported = sum(shade_status != ""),
            wild_yes = sum(wild == 1),
            wild_no = sum(wild == 0),
            wild_unk = sum(wild == ""),
            water_yes = sum(watered == 1),
            water_no = sum(watered == 0),
            water_unk = sum(watered == ""),
            fert_yes = sum(fertilized == 1),
            fert_no = sum(fertilized == 0),
            fert_unk = sum(fertilized == ""),
            gender_f = sum(gender == "Female"),
            gender_b = sum(gender == "Both"),
            gender_unk = sum(gender == ""),
            planting_date = sum(planting_date %in% c("", "--")),
            death_date = sum(death_date_observed != "")) %>%
  data.frame()
  

allplants <- data.frame(
  common_name = "All plants",
  n_plants = nrow(df),
  patch_yes = sum(df$patch == 1),
  patch_with_size = sum(df$patch_size != ""),
  shade_reported = sum(df$shade_status != ""),
  wild_yes = sum(df$wild == 1),
  wild_no = sum(df$wild == 0),
  wild_unk = sum(df$wild == ""),
  water_yes = sum(df$watered == 1),
  water_no = sum(df$watered == 0),
  water_unk = sum(df$watered == ""),
  fert_yes = sum(df$fertilized == 1),
  fert_no = sum(df$fertilized == 0),
  fert_unk = sum(df$fertilized == ""),
  gender_f = sum(df$gender == "Female"),
  gender_b = sum(df$gender == "Both"),
  gender_unk = sum(df$gender == ""),
  planting_date = sum(df$planting_date %in% c("", "--")),
  death_date = sum(df$death_date_observed != "")
)

plant_info <- rbind(spp, allplants)

# Extract most useful information
plant_info2 <- plant_info %>%
  select(-c(shade_reported, gender_f, gender_b, gender_unk)) %>%
  mutate(patch = paste0(patch_yes, " (", patch_with_size, ")"),
         wild = paste0(wild_yes, "/", wild_no, "/", wild_unk),
         water = paste0(water_yes, "/", water_no, "/", water_unk),
         fertilized = paste0(fert_yes, "/", fert_no, "/", fert_unk)) %>%
  select(-c(patch_yes, patch_with_size, wild_yes, wild_no, wild_unk,
            water_yes, water_no, water_unk, fert_yes, fert_no, fert_unk))
Table 8: Summary of information observers reported about plants that have been monitored since October 2023, by species. Planting date and Death date represent the number of plants where observers provided the date of planting or the date a plant died (if relevant). Patch indicates how many of these “individuals” were monitered as a patch (i.e, multiple plants), and for those that were monitored as patches, the number of instances where the observer noted the approximate size of the patch (in square feet). Note that for the patch information, there were no data to differentiate a “no” response from unreported.
Species No. plants Planting date Death date Patch (size) Wild (Y/N/Unreported) Watered (Y/N/Unreported) Fertilized (Y/N/Unreported)
American beautyberry 47 39 0 3 (3) 16/16/15 10/19/18 2/26/19
American star-thistle 3 3 0 1 (0) 1/0/2 1/0/2 0/1/2
Canada goldenrod 2 2 0 0 (0) 1/0/1 1/0/1 0/1/1
Texas lupine 14 9 0 7 (5) 6/5/3 3/7/4 0/10/4
West Indian shrubverbena 4 2 0 3 (1) 1/3/0 3/0/1 0/3/1
aquatic milkweed 2 2 0 1 (1) 0/1/1 1/0/1 0/1/1
black elderberry 1 1 0 0 (0) 0/0/1 0/0/1 0/0/1
blackeyed Susan 3 3 0 0 (0) 0/0/3 0/0/3 0/0/3
blue mistflower 8 7 0 2 (2) 0/2/6 2/0/6 0/1/7
broadleaf milkweed 2 2 0 2 (2) 2/0/0 0/2/0 0/2/0
button eryngo 3 2 0 0 (0) 0/3/0 0/3/0 1/2/0
cardinalflower 1 1 0 0 (0) 0/1/0 1/0/0 0/1/0
common buttonbush 24 22 0 0 (0) 6/10/8 3/12/9 0/14/10
common sunflower 8 7 0 5 (4) 3/2/3 2/4/2 0/4/4
eastern baccharis 7 6 0 0 (0) 3/0/4 0/3/4 0/3/4
eastern purple coneflower 6 5 0 0 (0) 0/3/3 3/0/3 0/3/3
eastern redbud 38 26 0 0 (0) 9/16/13 11/16/11 2/23/13
firewheel 6 5 0 1 (1) 0/3/3 3/0/3 1/2/3
golden crownbeard 1 1 0 0 (0) 0/0/1 0/0/1 0/0/1
green antelopehorn 4 3 0 1 (1) 1/2/1 2/1/1 0/3/1
horsetail milkweed 3 3 0 2 (2) 3/0/0 0/3/0 0/3/0
lemon beebalm 2 2 0 1 (1) 1/0/1 0/1/1 0/1/1
mealycup sage 7 3 0 3 (3) 0/6/1 5/1/1 1/5/1
palmleaf thoroughwort 2 2 0 1 (1) 0/2/0 2/0/0 0/2/0
purple passionflower 6 3 0 2 (2) 0/3/3 2/2/2 0/4/2
purple prairie clover 5 5 0 0 (0) 0/0/5 0/0/5 0/0/5
red maple 26 20 1 0 (0) 7/6/13 4/10/12 0/14/12
rubber rabbitbrush 5 4 0 1 (1) 3/1/1 0/4/1 1/2/2
seaside goldenrod 1 1 0 1 (1) 0/1/0 1/0/0 0/1/0
showy milkweed 4 3 0 2 (2) 2/2/0 2/2/0 0/3/1
spider milkweed 3 3 0 1 (1) 1/0/2 0/1/2 0/1/2
straggler daisy 1 1 0 0 (0) 0/0/1 0/0/1 0/0/1
tall blazing star 1 1 0 0 (0) 0/1/0 1/0/0 0/1/0
trumpet honeysuckle 1 0 0 0 (0) 0/1/0 1/0/0 0/0/1
turkey tangle fogfruit 6 6 0 2 (2) 1/1/4 1/1/4 0/2/4
upright prairie coneflower 1 1 0 0 (0) 0/0/1 0/0/1 0/0/1
wax mallow 9 6 0 2 (2) 0/3/6 3/0/6 1/2/6
white crownbeard 10 8 0 4 (3) 5/2/3 2/4/4 0/6/4
wild bergamot 7 5 0 4 (4) 2/4/1 4/2/1 0/5/2
All plants 284 225 1 52 (45) 74/100/110 74/98/112 9/152/123